Homeownership is one of the most important ways of acculating wealth. In New York City, proprty values are high and thus homeownership is out of reach for many.Using Home Mortgage Disclosure Act data, I explore the demographics of mortgage applicants and the various outcomes of their applications in order to better understand who receives loans and who does not. Looking at how outcomes change over time and by group, allows us to better understand the way opportunities within the city are changing.
#Loan applications are split pretty evenly between Manhattan, Brooklyn, and Queens, with White applicants representing a large share of the applications. The Unknown category is made up of applicants who did not disclose their race in their loan applications. Disparities in incomes between boroughs are shown in the second graph.
#treemap for counties and race
hmda_ny_5yrs %>% group_by(boro, race_alternative) %>%
summarise(n = n()) %>%
treemap(index = c("boro", "race_alternative"),
vSize ="n", title = "Loan Applicants Organized by Borough and Race \nHMDA Data 2013-2017",
fontsize.labels = c(20,15),
align.labels = list(c("left","top"),c("right","bottom")),
palette = c( '#FD7400', '#BF4182', '#580F45', '#0D177F', '#1f8a70'),
fontcolor.labels= '#F2F5F2', border.col = '#F2F5F2',
fontsize.title = 24)
#treemap of counties and income bins
hmda_ny_5yrs %>% group_by(boro, income_bins) %>%
summarise(n = n()) %>%
treemap(index = c("boro", "income_bins"),
vSize ="n", fontsize.labels = c(25,15),
title = "Loan Applicants Organized by Borough and Income Quartiles \nHMDA Data 2013-2017",
align.labels = list(c("left","top"),c("right","bottom")),
palette = c( '#FD7400', '#BF4182', '#580F45', '#0D177F', '#1f8a70'),
fontcolor.labels= '#F2F5F2', border.col = '#F2F5F2',
fontsize.title = 24)
#looking at differences in approval rates by male/female
hmda_ny_5yrs <- hmda_ny_5yrs %>%
mutate(win_income = winsorize(applicant_income_000s, probs = c(.01, .99)))
#filter by people without co-applicants
ggplot(filter(hmda_ny_5yrs, applicant_sex_name %in% c("Female","Male"), co_applicant_ethnicity_name == "No co-applicant"),
aes(y=win_income, x=applicant_sex_name, fill=outcome), alpha = .3) +
geom_boxplot(outlier.size = .5) + scale_y_log10(expand=c(0,0)) +
scale_fill_manual(values = c('#1f8a70', '#E8291A', '#FFE11A')) +
theme(text = element_text(family = "Meiryo"),
panel.background = element_rect(fill = '#F2F5F2'),
panel.grid.major = element_line(color = "#404040"),
plot.subtitle = element_text(face = "italic"),
panel.grid.minor = element_blank()) +
labs(fill = "Outcome", x = "Gender of Applicant",
y = "Income (in $1,000s, Log Scale)",
title = "On Average, Male Applicants have Higher Incomes \nthan Female Applicants",
subtitle = "Income by Gender, grouped by Loan Outcome", caption = "Home Mortgage Disclosure Act Data, CFPB, 2013-2017")
#approval rates by nta
#number of loans apps by nta
apps_by_nta <- hmda_ny_5yrs %>%
group_by(ntaname) %>%
summarise(total_apps = n())
#approvals by nta
approval_counts_by_nta <- hmda_ny_5yrs %>%
filter(outcome == "Approved") %>%
group_by(ntaname) %>%
summarise(total_approved = n())
approval_rates_by_nta <-
approval_counts_by_nta %>%
left_join(apps_by_nta, by = "ntaname") %>%
mutate(approval_rate = total_approved/total_apps*100)
nta_merged <- left_join(nta_shape, approval_rates_by_nta,
by = c("ntaname")) %>%
st_as_sf()
#dealing with ggplot
nta_merged$breaks <- cut(nta_merged$approval_rate, c(40, 50, 60, 70, 80))
#plot- approval rates by census block ####
ggplot() +
geom_sf(data = nta_merged,
aes(fill = breaks), color = "white", lwd = 0) +
labs(title = "Most Neighborhoods have Loan Approval Rates between \n50% and 60% ", subtitle= "Loan Approval Rates by Neighborhood Tabulation Area (NTA)",
caption = "Home Mortgage Disclosure Act Data, CFPB, 2013-2017") +
theme(axis.text=element_blank(),
axis.title=element_blank(),
panel.background = element_rect(fill = "#ffffff", color = NA),
axis.line = element_blank(),
plot.subtitle = element_text(face = "italic"),
legend.position = c(0.25,.75),
text = element_text(family = "Meiryo"),
axis.ticks = element_blank()) + guides(fill=guide_legend(title="Percent of \nLoan Applications \nApproved")) + scale_fill_manual(values = c('#ffffd4','#f7c48f','#e68a4d', '#cc4c02'))
The highest approval rates are in Bay Terrace/Clearview and Forest Hills, Queens, and Prospect Heights, Brooklyn. While Bay Terrace and Prospect Heights are known as affluent neighborhoods, Forest Hills is known for being diverse and middle-income. The lowest approval rates are found low-income neighborhoods such as Far Rockaway, Queens; Parkchester, Bronx; and Starrett City, Brooklyn.
#approval rates by race by census tract
approval_counts_by_race <- hmda_ny_5yrs %>%
filter(outcome == "Approved") %>%
group_by(census_tract_number, applicant_race_name_1) %>%
summarise(total_approved = n())
df_for_chloropleth_race_approval <- hmda_ny_5yrs %>%
group_by(census_tract_number, applicant_race_name_1) %>%
summarise(total_apps = n()) %>%
left_join(approval_counts_by_race,
by = c("census_tract_number", "applicant_race_name_1")) %>%
mutate(approval_rate = total_approved/total_apps)
census_tract_merged <- left_join(df_for_chloropleth_race_approval,census_tracts,
by = c("census_tract_number" ="ct2010")) %>%
st_as_sf()
#dealing with ggplot
census_tract_merged$breaks <- cut(census_tract_merged$approval_rate, c(0, .2, .4, .6, .8, 1))
#plot- approval rates by census block ####
ggplot(data = census_tract_merged) +
geom_sf(data = census_tract_merged, aes(fill = breaks), color = "white", lwd = 0) + facet_wrap(~applicant_race_name_1) +
labs(title = "Asians and Whites have high loan approval rates across New York City", subtitle = "Other racial groups have lower rates",
caption = "Home Mortgage Disclosure Act Data, CFPB, 2013-2017") +
theme(axis.text=element_blank(),
axis.title=element_blank(),
panel.background = element_rect(fill = "#ffffff", color = NA),
axis.line = element_blank(),
text = element_text(family = "Meiryo"),
axis.ticks = element_blank()) + guides(fill=guide_legend(title="Approval Rate")) + scale_fill_manual(values = c('#ffffd4', '#fdd2c0', '#f3a683', '#e27a48', '#cc4c02'))